/*
Purpose: clean data on low birth weight from NBER natality data
*/

* run below code to download from NBER
/*
foreach year of numlist 1977/1993 {
	use "https://data.nber.org/nvss/natality/dta/`year'/natalityus`year'.dta", clear
	save "data/raw/NBER natality/natalityus`year'.dta", replace
}


foreach year of numlist 1994/2001 {
	use "https://data.nber.org/nvss/natality/dta/`year'/natality`year'us.dta", clear
	save "data/raw/NBER natality/natalityus`year'.dta", replace
}
*/

foreach year of numlist 1977/2001 {
	use "data/raw/NBER natality/natalityus`year'.dta", clear
	gen year = `year'
	gen lowbirwt = birwt12 <= 5
	gen vlowbirwt = birwt12 <= 3
	gen totalwt = birwt12 != 12
	
	sum dbirwt if lowbirwt
	sum dbirwt if vlowbirwt
	
	collapse (count) totalwt (sum) lowbirwt vlowbirwt if totalwt, by(stateres year)
	tempfile birwt`year'
	save `birwt`year'', replace
}

use "data/raw/NBER natality/natalityus2001.dta", clear
keep stateres stresfip
duplicates drop
tempfile stxwalk
save `stxwalk', replace

use `birwt1977', clear
foreach year of numlist 1978/2001 {
	append using `birwt`year''
}

merge m:1 stateres using `stxwalk'
keep if _merge==3 // one 1977 code never used again (100 births)
drop _merge

drop if stresfip == "00"
drop stateres

ren stresfip fips_birth
ren year yob

gen lowbirwt_pct = lowbirwt / totalwt
gen vlowbirwt_pct = vlowbirwt / totalwt

keep fips_birth yob lowbirwt_pct vlowbirwt_pct
destring fips_birth, replace
save "data/clean/birthweight_data", replace
